cd "/Users/hausfath/Desktop/Climate Science/CRN/CRN HCN Project/"

global distance = 2000

*This file generates the station pairs used in the paper. 
*It calculates all permutations of CRN and HCN pairs within $distance miles of eachother that meet the selection criteria.

use uscrn_metadata.dta, clear
keep station_id lon lat start_date end_date 
order  station_id lon lat start_date end_date

*Rename hcn station metadata entries
foreach variable of varlist station_id-end_date {
	rename `variable' `variable'_v1
}

tempfile crn_stations_v1
save "`crn_stations_v1'", replace

use uscrn_metadata.dta, clear
keep station_id lon lat start_date end_date 
order  station_id lon lat start_date end_date

*Rename crn station metadata entries
foreach variable of varlist station_id-end_date {
	rename `variable' `variable'_v2
}


*Create all possible permutations of urban and rural pairs
cross using "`crn_stations_v1'"

*Convert the lat/lon values to radians
foreach value of varlist lat_v2 lon_v2 lat_v1 lon_v1 {
	replace `value' = `value' * _pi/180
}

*Use the spherical law of cosines formula to calculate the distance between pairs
gen R = 6371 // km	
gen distance = acos(sin(lat_v1)*sin(lat_v2) + 		///
               cos(lat_v1)*cos(lat_v2) * 				///
               cos(lon_v2-lon_v1)) * R * 0.621371192 // Convert to miles

*Remove pairs from the dataset if they fail the following criteria
keep if distance <= $distance
drop if station_id_v1 == station_id_v2

drop if end_date_v2 < 653
drop if end_date_v1 < 653

drop if start_date_v2 > 552
drop if start_date_v1 > 552


keep station_id_v1 station_id_v2 distance
*duplicates drop distance, force
sort station_id_v1
gen station_pair_id = _n
save crn_only_station_pairs_$distance.dta, replace
